# change in family size
d_FAM_SIZE_t = FAM_SIZE_t - FAM_SIZE_tm1,
d_NUM_ADTS_t = NUM_ADTS_t - NUM_ADTS_tm1,
d_NUM_KIDS_t = NUM_KIDS_t - NUM_KIDS_tm1,
# change in age
d_AGE_REF_t = AGE_REF_t - AGE_REF_tm1,
d_AGE_2 = AGE2_t - AGE2_tm1,
# 4 main expenditure categories
d_EX_FD_t = as.numeric(format(round(EX_FD_t - EX_FD_tm1,1),scientific=F)),
d_EX_SN_t = as.numeric(format(round(EX_SN_t - EX_SN_tm1,1),scientific=F)),
d_EX_N_t = as.numeric(format(round(EX_N_t - EX_N_tm1,1),scientific=F)),
d_EX_T_t = as.numeric(format(round(EX_T_t - EX_T_tm1,1),scientific=F)),
# food sub-category
d_EX_FD_HM_t = as.numeric(format(round(EX_FD_HM_t - EX_FD_HM_tm1,1),scientific=F)),
d_EX_FD_AW_t = as.numeric(format(round(EX_FD_AW_t - EX_FD_AW_tm1,1),scientific=F)),
d_EX_ALC_t = as.numeric(format(round(EX_ALC_t - EX_ALC_tm1,1),scientific=F)),
# Strict Non-durables
d_EX_UT_HO_t = as.numeric(format(round(EX_UT_HO_t - EX_UT_HO_tm1,1),scientific=F)),
d_EX_PC_MIS_t = as.numeric(format(round(EX_PC_MIS_t - EX_PC_MIS_tm1,1),scientific=F)),
d_EX_TR_GAS_t = as.numeric(format(round(EX_TR_GAS_t - EX_TR_GAS_tm1,1),scientific=F)),
d_EX_TBC_t = as.numeric(format(round(EX_TBC_t - EX_TBC_tm1,1),scientific=F)),
# non-durables
d_EX_APR_t = as.numeric(format(round(EX_APR_t - EX_APR_tm1,1),scientific=F)),
d_EX_HLT_t = as.numeric(format(round(EX_HLT_t - EX_HLT_tm1,1),scientific=F)),
d_EX_READ_t = as.numeric(format(round(EX_READ_t - EX_READ_tm1,1),scientific=F)),
# total
d_EX_HS_t = as.numeric(format(round(EX_HS_t - EX_HS_tm1,1),scientific=F)),
d_EX_EDU_t = as.numeric(format(round(EX_EDU_t - EX_EDU_tm1,1),scientific=F)),
d_EX_ENT_t = as.numeric(format(round(EX_ENT_t - EX_ENT_tm1,1),scientific=F)),
d_EX_TRANS_t = as.numeric(format(round(EX_TRANS_t - EX_TRANS_tm1,1),scientific=F)),
d_EX_CACT_t = as.numeric(format(round(EX_CACT_t - EX_CACT_tm1,1),scientific=F))
)
return(df)
}
#### 3.3 Incorporating first difference into df ####
# To increase coding efficiency, we reduce df to df by quarter
df_20q3 <- df %>% filter(YYMM==2007|YYMM==2008|YYMM==2009)
df_20q4 <- df %>% filter(YYMM==2010|YYMM==2011|YYMM==2012)
df_21q1 <- df %>% filter(YYMM==2101|YYMM==2102|YYMM==2103)
df_21q2 <- df %>% filter(YYMM==2104|YYMM==2105|YYMM==2106)
df_21q3 <- df %>% filter(YYMM==2107|YYMM==2108|YYMM==2109)
# Each df by quarter are merged with the corresponding interviews in the last quarter
df_20q3 <- diff_maker(df_20q3, fmli202)
df_20q4 <- diff_maker(df_20q4, fmli203)
df_21q1 <- diff_maker(df_21q1, fmli204)
df_21q2 <- diff_maker(df_21q2, fmli211)
df_21q3 <- diff_maker(df_21q3, fmli212)
# Binding df by quarters to form a complete df
df <- bind_rows(df_20q3, df_20q4, df_21q1, df_21q2, df_21q3)
df <- df %>%
arrange(ID) %>%
group_by(ID) %>%
arrange(YYMM, .by_group = TRUE) %>%
ungroup()
### 4 Computing average weights, the first documented income, and liquidity ####
# wts_inc_liq_creator extract weights, income, and liquidity from fmli files
wts_inc_liq_creator <- function(fmli){
fmli_weights_income <- fmli %>% select(NEWID,FINLWT21,FINCBTXM,LIQUDYR,LIQUDYRX) %>%
mutate(
ID = substr(as.character(NEWID),1,6),
# if CU reports no liquid accounts, then liquidity is set to 0
LIQUDYRX = ifelse(LIQUDYR==2 & !is.na(LIQUDYR),0,LIQUDYRX)
) %>% select(ID,FINLWT21,FINCBTXM,LIQUDYRX)
return(fmli_weights_income)}
# Apply wts_inc_liq_creator to each fmli file
fmli202_wts_inc_liq <- wts_inc_liq_creator(fmli202_copy)
fmli203_wts_inc_liq <- wts_inc_liq_creator(fmli203_copy)
fmli204_wts_inc_liq <- wts_inc_liq_creator(fmli204_copy)
fmli211_wts_inc_liq <- wts_inc_liq_creator(fmli211_copy)
fmli212_wts_inc_liq <- wts_inc_liq_creator(fmli212_copy)
fmli213_wts_inc_liq <- wts_inc_liq_creator(fmli213_copy)
# Merge to obtain the weights, income, and liquidity in each interview
wts_inc_liq <- merge(fmli202_wts_inc_liq,fmli203_wts_inc_liq,by="ID",all=TRUE)
wts_inc_liq <- wts_inc_liq %>% rename(
FINLWT21_202 = FINLWT21.x, FINLWT21_203 = FINLWT21.y, FINCBTXM_202 = FINCBTXM.x,
FINCBTXM_203 = FINCBTXM.y, LIQUDYRX_202 = LIQUDYRX.x, LIQUDYRX_203 = LIQUDYRX.y)
wts_inc_liq <- merge(wts_inc_liq,fmli204_wts_inc_liq, by="ID", all=TRUE)
wts_inc_liq <- wts_inc_liq %>% rename(
FINLWT21_204 = FINLWT21, FINCBTXM_204 = FINCBTXM, LIQUDYRX_204 = LIQUDYRX)
wts_inc_liq <- merge(wts_inc_liq,fmli211_wts_inc_liq,by="ID",all=TRUE)
wts_inc_liq <- wts_inc_liq %>% rename(
FINLWT21_211 = FINLWT21, FINCBTXM_211 = FINCBTXM, LIQUDYRX_211 = LIQUDYRX)
wts_inc_liq <- merge(wts_inc_liq,fmli212_wts_inc_liq,by="ID",all=TRUE)
wts_inc_liq <- wts_inc_liq %>% rename(
FINLWT21_212 = FINLWT21, FINCBTXM_212 = FINCBTXM, LIQUDYRX_212 = LIQUDYRX)
wts_inc_liq <- merge(wts_inc_liq,fmli213_wts_inc_liq,by="ID",all=TRUE)
wts_inc_liq <- wts_inc_liq %>% rename(
FINLWT21_213 = FINLWT21, FINCBTXM_213 = FINCBTXM, LIQUDYRX_213 = LIQUDYRX)
# Average weights
wts_inc_liq$FINLWT21_AVG <- rowMeans(wts_inc_liq[,c("FINLWT21_202",
"FINLWT21_203",
"FINLWT21_204",
"FINLWT21_211",
"FINLWT21_212",
"FINLWT21_213")],
na.rm=TRUE)
# first income
wts_inc_liq$FINCBTXM_FST <- ifelse(!is.na(wts_inc_liq$FINCBTXM_202),wts_inc_liq$FINCBTXM_202,
ifelse(!is.na(wts_inc_liq$FINCBTXM_203),wts_inc_liq$FINCBTXM_203,
ifelse(!is.na(wts_inc_liq$FINCBTXM_204),wts_inc_liq$FINCBTXM_204,
ifelse(!is.na(wts_inc_liq$FINCBTXM_211),wts_inc_liq$FINCBTXM_211,
ifelse(!is.na(wts_inc_liq$FINCBTXM_212),wts_inc_liq$FINCBTXM_212,wts_inc_liq$FINCBTXM_213)))))
# liquidity
wts_inc_liq$LIQUDYRX <- ifelse(!is.na(wts_inc_liq$LIQUDYRX_202),wts_inc_liq$LIQUDYRX_202,
ifelse(!is.na(wts_inc_liq$LIQUDYRX_203),wts_inc_liq$LIQUDYRX_203,
ifelse(!is.na(wts_inc_liq$LIQUDYRX_204),wts_inc_liq$LIQUDYRX_204,
ifelse(!is.na(wts_inc_liq$LIQUDYRX_211),wts_inc_liq$LIQUDYRX_211,
ifelse(!is.na(wts_inc_liq$LIQUDYRX_212),wts_inc_liq$LIQUDYRX_212,wts_inc_liq$LIQUDYRX_213)))))
wts_inc_liq <- wts_inc_liq %>% select(ID,FINLWT21_AVG,FINCBTXM_FST,LIQUDYRX)
### 5 Computing average expenditure (for scaling) ####
#### 5.1 fmli_expd_creator select expd variables ####
fmli_expd_creator <- function(fmli){
# Only the expd variables
fmli <- fmli %>% select(1,11:29)
return(fmli)
}
# Apply to each fmli
fmli202_expd <- fmli_expd_creator(fmli202)
fmli203_expd <- fmli_expd_creator(fmli203)
fmli204_expd <- fmli_expd_creator(fmli204)
fmli211_expd <- fmli_expd_creator(fmli211)
fmli212_expd <- fmli_expd_creator(fmli212)
fmli213_expd <- fmli_expd_creator(fmli213)
#### 5.2 Merge expd in all periods ####
expd <- merge(fmli202_expd,fmli203_expd,by="ID",all=TRUE)
expd <- expd %>% rename(
EX_FD_202 = EX_FD.x, EX_SN_202 = EX_SN.x, EX_N_202 = EX_N.x, EX_T_202 = EX_T.x,
EX_FD_HM_202 = EX_FD_HM.x, EX_FD_AW_202 = EX_FD_AW.x, EX_ALC_202 = EX_ALC.x,
EX_UT_HO_202 = EX_UT_HO.x, EX_PC_MIS_202 = EX_PC_MIS.x, EX_TR_GAS_202 = EX_TR_GAS.x,
EX_TBC_202 = EX_TBC.x, EX_APR_202 = EX_APR.x, EX_HLT_202 = EX_HLT.x, EX_READ_202 = EX_READ.x,
EX_HS_202 = EX_HS.x, EX_EDU_202 = EX_EDU.x, EX_ENT_202 = EX_ENT.x, EX_TRANS_202 = EX_TRANS.x,
EX_CACT_202 = EX_CACT.x,
EX_FD_203 = EX_FD.y, EX_SN_203 = EX_SN.y, EX_N_203 = EX_N.y, EX_T_203 = EX_T.y,
EX_FD_HM_203 = EX_FD_HM.y, EX_FD_AW_203 = EX_FD_AW.y, EX_ALC_203 = EX_ALC.y,
EX_UT_HO_203 = EX_UT_HO.y, EX_PC_MIS_203 = EX_PC_MIS.y, EX_TR_GAS_203 = EX_TR_GAS.y,
EX_TBC_203 = EX_TBC.y, EX_APR_203 = EX_APR.y, EX_HLT_203 = EX_HLT.y, EX_READ_203 = EX_READ.y,
EX_HS_203 = EX_HS.y, EX_EDU_203 = EX_EDU.y, EX_ENT_203 = EX_ENT.y, EX_TRANS_203 = EX_TRANS.y,
EX_CACT_203 = EX_CACT.y)
# merge expd with fmli204
expd <- merge(expd,fmli204_expd,by="ID",all=TRUE)
expd <- expd %>% rename(
EX_FD_204 = EX_FD, EX_SN_204 = EX_SN, EX_N_204 = EX_N, EX_T_204 = EX_T,
EX_FD_HM_204 = EX_FD_HM, EX_FD_AW_204 = EX_FD_AW, EX_ALC_204 = EX_ALC,
EX_UT_HO_204 = EX_UT_HO, EX_PC_MIS_204 = EX_PC_MIS, EX_TR_GAS_204 = EX_TR_GAS,
EX_TBC_204 = EX_TBC, EX_APR_204 = EX_APR, EX_HLT_204 = EX_HLT, EX_READ_204 = EX_READ,
EX_HS_204 = EX_HS, EX_EDU_204 = EX_EDU, EX_ENT_204 = EX_ENT, EX_TRANS_204 = EX_TRANS,
EX_CACT_204 = EX_CACT)
# merge expd with fmli211
expd <- merge(expd,fmli211_expd,by="ID",all=TRUE)
expd <- expd %>% rename(
EX_FD_211 = EX_FD, EX_SN_211 = EX_SN, EX_N_211 = EX_N, EX_T_211 = EX_T,
EX_FD_HM_211 = EX_FD_HM, EX_FD_AW_211 = EX_FD_AW, EX_ALC_211 = EX_ALC,
EX_UT_HO_211 = EX_UT_HO, EX_PC_MIS_211 = EX_PC_MIS, EX_TR_GAS_211 = EX_TR_GAS,
EX_TBC_211 = EX_TBC, EX_APR_211 = EX_APR, EX_HLT_211 = EX_HLT, EX_READ_211 = EX_READ,
EX_HS_211 = EX_HS, EX_EDU_211 = EX_EDU, EX_ENT_211 = EX_ENT, EX_TRANS_211 = EX_TRANS,
EX_CACT_211 = EX_CACT)
# merge expd with fmli212
expd <- merge(expd,fmli212_expd,by="ID",all=TRUE)
expd <- expd %>% rename(
EX_FD_212 = EX_FD, EX_SN_212 = EX_SN, EX_N_212 = EX_N, EX_T_212 = EX_T,
EX_FD_HM_212 = EX_FD_HM, EX_FD_AW_212 = EX_FD_AW, EX_ALC_212 = EX_ALC,
EX_UT_HO_212 = EX_UT_HO, EX_PC_MIS_212 = EX_PC_MIS, EX_TR_GAS_212 = EX_TR_GAS,
EX_TBC_212 = EX_TBC, EX_APR_212 = EX_APR, EX_HLT_212 = EX_HLT, EX_READ_212 = EX_READ,
EX_HS_212 = EX_HS, EX_EDU_212 = EX_EDU, EX_ENT_212 = EX_ENT, EX_TRANS_212 = EX_TRANS,
EX_CACT_212 = EX_CACT)
# merge expd with fmli213
expd <- merge(expd,fmli213_expd,by="ID",all=TRUE)
expd <- expd %>% rename(
EX_FD_213 = EX_FD, EX_SN_213 = EX_SN, EX_N_213 = EX_N, EX_T_213 = EX_T,
EX_FD_HM_213 = EX_FD_HM, EX_FD_AW_213 = EX_FD_AW, EX_ALC_213 = EX_ALC,
EX_UT_HO_213 = EX_UT_HO, EX_PC_MIS_213 = EX_PC_MIS, EX_TR_GAS_213 = EX_TR_GAS,
EX_TBC_213 = EX_TBC, EX_APR_213 = EX_APR, EX_HLT_213 = EX_HLT, EX_READ_213 = EX_READ,
EX_HS_213 = EX_HS, EX_EDU_213 = EX_EDU, EX_ENT_213 = EX_ENT, EX_TRANS_213 = EX_TRANS,
EX_CACT_213 = EX_CACT)
#### 5.3 Computing average expenditures for scaling #####
expd <- expd %>% mutate(
EX_FD_AVG = rowMeans(expd[,c("EX_FD_202","EX_FD_203","EX_FD_204",
"EX_FD_211", "EX_FD_212", "EX_FD_213")], na.rm=TRUE),
EX_SN_AVG = rowMeans(expd[,c("EX_SN_202","EX_SN_203","EX_SN_204",
"EX_SN_211", "EX_SN_212", "EX_SN_213")], na.rm=TRUE),
EX_N_AVG = rowMeans(expd[,c("EX_N_202","EX_N_203","EX_N_204",
"EX_N_211", "EX_N_212", "EX_N_213")], na.rm=TRUE),
EX_T_AVG = rowMeans(expd[,c("EX_T_202","EX_T_203","EX_T_204",
"EX_T_211", "EX_T_212","EX_T_213")], na.rm=TRUE),
EX_FD_HM_AVG = rowMeans(expd[,c("EX_FD_HM_202","EX_FD_HM_203","EX_FD_HM_204",
"EX_FD_HM_211", "EX_FD_HM_212","EX_FD_HM_213" )], na.rm=TRUE),
EX_FD_AW_AVG = rowMeans(expd[,c("EX_FD_AW_202","EX_FD_AW_203","EX_FD_AW_204",
"EX_FD_AW_211", "EX_FD_AW_212", "EX_FD_AW_213")], na.rm=TRUE),
EX_ALC_AVG = rowMeans(expd[,c("EX_ALC_202","EX_ALC_203","EX_ALC_204",
"EX_ALC_211", "EX_ALC_212","EX_ALC_213")], na.rm=TRUE),
EX_UT_HO_AVG = rowMeans(expd[,c("EX_UT_HO_202","EX_UT_HO_203","EX_UT_HO_204",
"EX_UT_HO_211", "EX_UT_HO_212","EX_UT_HO_213" )], na.rm=TRUE),
EX_PC_MIS_AVG = rowMeans(expd[,c("EX_PC_MIS_202","EX_PC_MIS_203","EX_PC_MIS_204",
"EX_PC_MIS_211", "EX_PC_MIS_212", "EX_PC_MIS_213")], na.rm=TRUE),
EX_TR_GAS_AVG = rowMeans(expd[,c("EX_TR_GAS_202","EX_TR_GAS_203","EX_TR_GAS_204",
"EX_TR_GAS_211", "EX_TR_GAS_212", "EX_TR_GAS_213")], na.rm=TRUE),
EX_TBC_AVG = rowMeans(expd[,c("EX_TBC_202","EX_TBC_203","EX_TBC_204",
"EX_TBC_211", "EX_TBC_212", "EX_TBC_213")], na.rm=TRUE),
EX_APR_AVG = rowMeans(expd[,c("EX_APR_202","EX_APR_203","EX_APR_204",
"EX_APR_211", "EX_APR_212","EX_APR_213")], na.rm=TRUE),
EX_HLT_AVG = rowMeans(expd[,c("EX_HLT_202","EX_HLT_203","EX_HLT_204",
"EX_HLT_211", "EX_HLT_212","EX_HLT_213")], na.rm=TRUE),
EX_READ_AVG = rowMeans(expd[,c("EX_READ_202","EX_READ_203","EX_READ_204",
"EX_READ_211", "EX_READ_212","EX_READ_213")], na.rm=TRUE),
EX_HS_AVG = rowMeans(expd[,c("EX_HS_202","EX_HS_203","EX_HS_204",
"EX_HS_211", "EX_HS_212","EX_HS_213")], na.rm=TRUE),
EX_EDU_AVG = rowMeans(expd[,c("EX_EDU_202","EX_EDU_203","EX_EDU_204",
"EX_EDU_211", "EX_EDU_212","EX_EDU_213")], na.rm=TRUE),
EX_ENT_AVG = rowMeans(expd[,c("EX_ENT_202","EX_ENT_203","EX_ENT_204",
"EX_ENT_211", "EX_ENT_212","EX_ENT_213" )], na.rm=TRUE),
EX_TRANS_AVG = rowMeans(expd[,c("EX_TRANS_202","EX_TRANS_203","EX_TRANS_204",
"EX_TRANS_211", "EX_TRANS_212","EX_TRANS_213")], na.rm=TRUE),
EX_CACT_AVG = rowMeans(expd[,c("EX_CACT_202","EX_CACT_203","EX_CACT_204",
"EX_CACT_211", "EX_CACT_212","EX_CACT_213")], na.rm=TRUE)) %>%
select(ID,EX_FD_AVG, EX_SN_AVG, EX_N_AVG, EX_T_AVG, EX_FD_HM_AVG, EX_FD_AW_AVG,
EX_ALC_AVG, EX_UT_HO_AVG, EX_PC_MIS_AVG, EX_TR_GAS_AVG, EX_TBC_AVG, EX_APR_AVG,
EX_HLT_AVG, EX_READ_AVG, EX_HS_AVG, EX_EDU_AVG, EX_ENT_AVG, EX_TRANS_AVG, EX_CACT_AVG) # Only ID and averages
### 6 Merge 4 and 5 results into df and re-arrange variables ####
#### 6.1 Merge weights, income, liquidity, and average expenditure into df ####
# merge df with weights, income, and liquidity
df <- merge(df, wts_inc_liq, by="ID")
# merge df with avergae expenditure
df <- merge(df, expd, by="ID")
#### 6.2 Final re-arrange ####
df <- df %>% select(
ID, NEWID, YYMM, INTERI, FINLWT21_AVG,FINCBTXM_FST,LIQUDYRX,
# basic EIPs
EIPI_t, EIPI_tm1, EIPI_tm2, EIPI_tm3,
EIPII_t, EIPII_t_count, EIPII_tm1, EIPII_tm2, EIPII_tm3,
EIPIII_t, EIPIII_tm1, EIPIII_tm2, EIPIII_tm3,
# Basic expenditure
d_EX_FD_t, d_EX_SN_t, d_EX_N_t, d_EX_T_t,
# Controls
d_NUM_ADTS_t, d_NUM_KIDS_t, AGE_AVG_t,
# Demographics for cleaning
d_AGE_REF_t, d_AGE_2, d_FAM_SIZE_t, NUM_KIDS_t, NUM_ADTS_t, FAM_SIZE_t,
AGE_REF_t, AGE2_t, SEX_REF_t, SEX_REF_tm1, MARITAL1_t, MARITAL1_tm1, CUTENURE_t,
NUM_KIDS_tm1, NUM_ADTS_tm1,
# More EIPIs
EIPI_by_ck_t, EIPI_by_dd_t, EIPI_by_dc_t, EIPI_for_ep_t, EIPI_for_debt_t, EIPI_for_sv_t,
EIPI_by_ck_tm1, EIPI_by_dd_tm1, EIPI_by_dc_tm1, EIPI_for_ep_tm1, EIPI_for_debt_tm1, EIPI_for_sv_tm1,
EIPI_by_ck_tm2, EIPI_by_dd_tm2, EIPI_by_dc_tm2, EIPI_for_ep_tm2, EIPI_for_debt_tm2, EIPI_for_sv_tm2,
EIPI_by_ck_tm3, EIPI_by_dd_tm3, EIPI_by_dc_tm3, EIPI_for_ep_tm3, EIPI_for_debt_tm3, EIPI_for_sv_tm3,
# More EIPIIs
EIPII_by_ck_t, EIPII_by_dd_t, EIPII_by_dc_t, EIPII_for_ep_t, EIPII_for_debt_t, EIPII_for_sv_t,
EIPII_by_ck_tm1, EIPII_by_dd_tm1, EIPII_by_dc_tm1, EIPII_for_ep_tm1, EIPII_for_debt_tm1, EIPII_for_sv_tm1,
EIPII_by_ck_tm2, EIPII_by_dd_tm2, EIPII_by_dc_tm2, EIPII_for_ep_tm2, EIPII_for_debt_tm2, EIPII_for_sv_tm2,
EIPII_by_ck_tm3, EIPII_by_dd_tm3, EIPII_by_dc_tm3, EIPII_for_ep_tm3, EIPII_for_debt_tm3, EIPII_for_sv_tm3,
# More EIPIIIs
EIPIII_by_ck_t, EIPIII_by_dd_t, EIPIII_by_dc_t, EIPIII_for_ep_t, EIPIII_for_debt_t, EIPIII_for_sv_t,
EIPIII_by_ck_tm1, EIPIII_by_dd_tm1, EIPIII_by_dc_tm1, EIPIII_for_ep_tm1, EIPIII_for_debt_tm1, EIPIII_for_sv_tm1,
EIPIII_by_ck_tm2, EIPIII_by_dd_tm2, EIPIII_by_dc_tm2, EIPIII_for_ep_tm2, EIPIII_for_debt_tm2, EIPIII_for_sv_tm2,
EIPIII_by_ck_tm3, EIPIII_by_dd_tm3, EIPIII_by_dc_tm3, EIPIII_for_ep_tm3, EIPIII_for_debt_tm3, EIPIII_for_sv_tm3,
# EIP by month
# EIPI_apr_t, EIPI_may_t, EIPI_jun_t, EIPI_jul_t, EIPI_aug_t, EIPI_sep_t, EIPI_oct_t,
# EIPI_nov_t, EIPII_dec_t, EIPII_jan_t, EIPII_feb_t,  EIPIII_mar_t, EIPIII_apr_t, EIPIII_may_t,
# EIPIII_jun_t, EIPIII_jul_t, EIPIII_aug_t,
# EIP Status dummies
r, ck, dd, dc, ck_dd, ck_dc, dd_dc, ck_dd_dc,
ep, debt, sv, ep_debt, ep_sv, debt_sv, ep_debt_sv,
# Expenditure levels
EX_FD_t, EX_FD_tm1, EX_SN_t, EX_SN_tm1, EX_N_t, EX_N_tm1, EX_T_t, EX_T_tm1,
# Expenditure Sub-categories
d_EX_FD_HM_t, d_EX_FD_AW_t, d_EX_ALC_t, d_EX_UT_HO_t, d_EX_PC_MIS_t, d_EX_TR_GAS_t,
d_EX_TBC_t, d_EX_APR_t, d_EX_HLT_t, d_EX_READ_t, d_EX_HS_t, d_EX_EDU_t, d_EX_ENT_t,
d_EX_TRANS_t, d_EX_CACT_t,
# Scalers
EX_FD_AVG, EX_SN_AVG, EX_N_AVG, EX_T_AVG,
EX_FD_HM_AVG, EX_FD_AW_AVG,
EX_ALC_AVG, EX_UT_HO_AVG, EX_PC_MIS_AVG, EX_TR_GAS_AVG, EX_TBC_AVG, EX_APR_AVG,
EX_HLT_AVG, EX_READ_AVG, EX_HS_AVG, EX_EDU_AVG, EX_ENT_AVG, EX_TRANS_AVG,
EX_CACT_AVG
)
### 7 Clean df to get the two samples ####
#### 7.1 All households sample ####
# We reduce the sample step by step s
# so we can trace out the change in sample size after each step
# Drop if lives in student housing
df_all_cu <- df %>% filter(CUTENURE_t != 6)
# Drop if age_ref < 21 or age2 < 21 (or > 85)
df_all_cu <- df_all_cu %>% filter(AGE_REF_t >= 21 & AGE_REF_t <= 85)
df_all_cu <- df_all_cu %>% filter(is.na(AGE2_t) | (AGE2_t >= 21 & AGE2_t <= 85))
# Drop if age_ref change is greater than 1 or less than 0 (if the sex of the reference person is the same)
df_all_cu$drop <- ifelse(((df_all_cu$d_AGE_REF_t > 1 | df_all_cu$d_AGE_REF_t < 0) & df_all_cu$SEX_REF_t == df_all_cu$SEX_REF_tm1), 1, 0)
df_all_cu <- df_all_cu %>% filter(df_all_cu$drop==0)
#  Drop if age2 change is greater than 1 or less than 0 (if the reference person has the same sex or marital status)
df_all_cu$drop <- ifelse(((df_all_cu$d_AGE_2 > 1 | df_all_cu$d_AGE_2 < 0) & df_all_cu$SEX_REF_t == df_all_cu$SEX_REF_tm1
& df_all_cu$MARITAL1_t == df_all_cu$MARITAL1_tm1), 1, 0)
df_all_cu <- df_all_cu %>% filter(df_all_cu$drop==0|is.na(df_all_cu$drop))
# Drop if change in family size is greater than or less than 3 in absolute values
df_all_cu <- df_all_cu %>% filter (d_FAM_SIZE_t <= 3 & d_FAM_SIZE_t >= -3)
# Drop bottom 1 percent of CUs in terms of nondurable consumption after adjustment for CU size and time trend
df_all_cu$EX_N_PC <- df_all_cu$EX_N_t / (df_all_cu$NUM_ADTS_t + 0.6 *df_all_cu$NUM_KIDS_t)
df_all_cu <- df_all_cu %>% mutate(
TT = ifelse(YYMM==2008,0,
ifelse(YYMM==2009,1,
ifelse(YYMM==2010,2,
ifelse(YYMM==2011,3,
ifelse(YYMM==2012,4,
ifelse(YYMM==2101,5,
ifelse(YYMM==2102,6,
ifelse(YYMM==2103,7,
ifelse(YYMM==2104,8,
ifelse(YYMM==2105,9,
ifelse(YYMM==2106,10,
ifelse(YYMM==2107,11,
ifelse(YYMM==2108,12,13))))))))))))))
# Quantile regression of per capita consumption on time trend for the bottom 1%
qr_bot <- rq(data=df_all_cu,EX_N_PC~TT,tau=0.01)
summary(qr_bot)
df_all_cu$fit_val_bot <- qr_bot[["fitted.values"]]
df_all_cu$drop <- ifelse(df_all_cu$fit_val_bot > df_all_cu$EX_N_PC, 1, 0)
df_all_cu <- df_all_cu %>% filter(df_all_cu$drop==0) %>% select(-c(EX_N_PC,fit_val_bot,drop,TT))
write.csv(df_all_cu,"df_all_cu.csv",row.names = FALSE)
#### 7.2 Final Panel ####
# drop if lives in student housing
df_f <- df %>% filter(CUTENURE_t != 6)
# drop if age_ref < 21 or age2 < 21
df_f <- df_f %>% filter(AGE_REF_t >= 21)
df_f <- df_f %>% filter(is.na(AGE2_t) | AGE2_t >= 21)
# drop if age_ref change is greater than 1 or less than 0 (if the sex of the reference person is the same)
df_f$drop <- ifelse(((df_f$d_AGE_REF_t > 1 | df_f$d_AGE_REF_t < 0) & df_f$SEX_REF_t == df_f$SEX_REF_tm1), 1, 0)
df_f <- df_f %>% filter(df_f$drop==0)
#  drop if age2 change is greater than 1 or less than 0 (if the reference person has the same sex or marital status)
df_f$drop <- ifelse(((df_f$d_AGE_2 > 1 | df_f$d_AGE_2 < 0) & df_f$SEX_REF_t == df_f$SEX_REF_tm1
& df_f$MARITAL1_t == df_f$MARITAL1_tm1), 1, 0)
df_f <- df_f %>% filter(df_f$drop==0|is.na(df_f$drop))
# drop if change in family size is greater than or less than 3 in absolute values
df_f <- df_f %>% filter (d_FAM_SIZE_t <= 3 & d_FAM_SIZE_t >= -3)
# drop bottom 1 percent of CUs in terms of non-durable consumption in each month after adjustment for CU size
df_f$EX_N_PC <- df_f$EX_N_t / (df_f$NUM_ADTS_t + 0.6 *df_f$NUM_KIDS_t)
df_f %>%
group_by(YYMM) %>%
summarize(quant1 = quantile(EX_N_PC, probs = 0.01)) %>%
ungroup()
df_f$drop <- ifelse((df_f$YYMM == 2008 & df_f$EX_N_PC <= 850), 1,
ifelse((df_f$YYMM == 2009 & df_f$EX_N_PC <= 769), 1,
ifelse((df_f$YYMM == 2010 & df_f$EX_N_PC <= 922), 1,
ifelse((df_f$YYMM == 2011 & df_f$EX_N_PC <= 614), 1,
ifelse((df_f$YYMM == 2012 & df_f$EX_N_PC <= 614), 1,
ifelse((df_f$YYMM == 2101 & df_f$EX_N_PC <= 751), 1,
ifelse((df_f$YYMM == 2102 & df_f$EX_N_PC <= 671), 1,
ifelse((df_f$YYMM == 2103 & df_f$EX_N_PC <= 767), 1,
ifelse((df_f$YYMM == 2104 & df_f$EX_N_PC <= 798), 1,
ifelse((df_f$YYMM == 2105 & df_f$EX_N_PC <= 727), 1,
ifelse((df_f$YYMM == 2106 & df_f$EX_N_PC <= 679), 1,
ifelse((df_f$YYMM == 2107 & df_f$EX_N_PC <= 713), 1,
ifelse((df_f$YYMM == 2108 & df_f$EX_N_PC <= 464), 1,
ifelse((df_f$YYMM == 2109 & df_f$EX_N_PC <= 653), 1,0
))))))))))))))
df_f <- df_f %>% filter(df_f$drop==0)
# Drop high income
df_f$MARITAL_t <- ifelse(df_f$MARITAL1_t == 1, 1, 0)
#### Income cutoff table ####
# For single, without kids
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 75000 & FINCBTXM_FST > 50000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 100000 & FINCBTXM_FST > 75000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
# table(check$r)
# For single, with kids
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
# table(check$r)
# For married couple, no kids
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
# table(check$r)
# For married couple, with kids
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
# table(check$r)
# For adults, no kids
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
# table(check$r)
# For adults, with kids
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
# table(check$r)
#
# check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
# table(check$r)
# cutoff
df_f$drop <- ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t==1 & df_f$NUM_KIDS_t ==0 & df_f$FINCBTXM_FST <= 125000), 0,
ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t==1 & df_f$NUM_KIDS_t >0 & df_f$FINCBTXM_FST <= 175000), 0,
ifelse((df_f$MARITAL_t==1 & df_f$NUM_KIDS_t ==0 & df_f$FINCBTXM_FST <= 275000), 0,
ifelse((df_f$MARITAL_t==1 & df_f$NUM_KIDS_t >0 & df_f$FINCBTXM_FST <= 275000), 0,
ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t > 1 & df_f$NUM_KIDS_t ==0 & df_f$FINCBTXM_FST <= 250000), 0,
ifelse((df_f$MARITAL_t==0 & df_f$NUM_ADTS_t > 1 & df_f$NUM_KIDS_t >0 & df_f$FINCBTXM_FST <= 275000), 0, 1))))))
df_f <- df_f %>% filter(df_f$drop==0) %>% select(-c(EX_N_PC, drop, MARITAL_t))
write.csv(df_f,"df_f.csv", row.names = FALSE)
### 8 Create imputed value of EIP2 ###
# Imprt imputed EIP data
imput_eip <- read_excel("Raw data/imputed_eip.xlsx")
#### 8.1 Restrict df_f to only observations interviewed in 2103 or 2104
df_f_imp2 <- df_f %>% group_by(ID) %>% filter(any(YYMM != 2102))
df_f_imp <- df_f %>% group_by(ID) %>% filter(any(YYMM == 2103 | YYMM == 2104 | YYMM == 2006 | YYMM == 2007 |
YYMM == 2009 | YYMM == 2110 | YYMM == 2012 | YYMM == 2101 |
YYMM == 2106 | YYMM == 2107 | YYMM == 2109 | YYMM == 2110))
#### 8.2 Mege df_f_imp with imputed values in IMPUT_EIP.xlsx
####    These imputations were created using tax unit level data on AGI and number of dependents internal to the BLS
df_f_imp <- merge(df_f_imp, imput_eip, by.x="NEWID", by.y = "newid")
#### 8.3 Create imputed value for EIPII_t
df_f_imp <- df_f_imp %>% mutate(imp_eip2t = ifelse(EIPII_t == imp_eip2_1, imp_eip2_1,
ifelse(EIPII_t == imp_eip2_2, imp_eip2_2,
ifelse(EIPII_t == imp_eip2_3, imp_eip2_3,
ifelse(EIPII_t == imp_eip2_4, imp_eip2_4,0)))))
df_f_imp <- df_f_imp %>% mutate(imp_eip2t = ifelse((YYMM == 2103 | YYMM == 2104) & EIPII_t == 0,
ifelse(INTERI == 1,imp_eip2_1,
ifelse(INTERI == 2,imp_eip2_2,
ifelse(INTERI == 3,imp_eip2_3,
ifelse(INTERI == 4,imp_eip2_4,-100)))),imp_eip2t))
df_f_imp <- df_f_imp %>% mutate(imp_eip2t = ifelse((YYMM == 2103 | YYMM == 2104) & EIPII_t > 0 & imp_eip2t == 0,
ifelse(INTERI == 1,imp_eip2_1,
ifelse(INTERI == 2,imp_eip2_2,
ifelse(INTERI == 3,imp_eip2_3,
ifelse(INTERI == 4,imp_eip2_4,-200)))),imp_eip2t))
#### 8.4 Create imputed value for EIPII_tm1
df_f_imp <- df_f_imp %>% group_by(ID) %>% arrange(YYMM, .by_group = TRUE) %>%
mutate(imp_eip2tm1 = lag(imp_eip2t, n=1, default = 0),
imp_eip2tm2 = lag(imp_eip2t, n=2, default = 0),
imp_eip2tm3 = lag(imp_eip2t, n=3, default = 0))
df_f_imp <- df_f_imp %>% select(-c(imp_eip2_1,imp_eip2_2,imp_eip2_3,imp_eip2_4,imp_eip3_1,imp_eip3_2,imp_eip3_3,imp_eip3_4))
#### 8.5 Assign lagged imputed EIPII value to CUs without observation in 2103 or 2004
df_f_imp <- df_f_imp %>% mutate(imp_eip2tm1 = ifelse((YYMM == 2106 | YYMM == 2107) & imp_eip2tm1 == 0,
ifelse(INTERI == 1, NA,
ifelse(INTERI == 2, imp_eip2_1,
ifelse(INTERI == 3, imp_eip2_2,
ifelse(INTERI == 4, imp_eip2_4,-300)))),imp_eip2tm1))
df_f_imp <- df_f_imp %>% mutate(imp_eip2tm2 = ifelse((YYMM == 2111 | YYMM == 2112) & imp_eip2tm2 == 0,
ifelse(INTERI == 1, NA,
ifelse(INTERI == 2, NA,
ifelse(INTERI == 3, imp_eip2_1,
ifelse(INTERI == 4, imp_eip2_2,-300)))),imp_eip2tm2))
#### 8.6 Create categorical representation of observed and imputed EIP value
df_f_imp <- df_f_imp %>% mutate(eip2t_cat = ifelse(EIPII_t > 0 & EIPII_t < 600,1,
ifelse(EIPII_t == 600,2,
ifelse(EIPII_t > 600 & EIPII_t < 1200,3,
ifelse(EIPII_t == 1200,4,
ifelse(EIPII_t > 1200 & EIPII_t < 1800,5,
ifelse(EIPII_t == 1800,6,
ifelse(EIPII_t > 1800 & EIPII_t < 2400,7,
ifelse(EIPII_t == 2400,8,
ifelse(EIPII_t > 2400,9,0))))))))))
df_f_imp <- df_f_imp %>% mutate(impeip2t_cat = ifelse(imp_eip2t > 0 & imp_eip2t < 600,1,
ifelse(imp_eip2t == 600,2,
ifelse(imp_eip2t > 600 & imp_eip2t < 1200,3,
ifelse(imp_eip2t == 1200,4,
ifelse(imp_eip2t > 1200 & imp_eip2t < 1800,5,
ifelse(imp_eip2t == 1800,6,
ifelse(imp_eip2t > 1800 & imp_eip2t < 2400,7,
ifelse(imp_eip2t == 2400,8,
ifelse(imp_eip2t > 2400,9,0))))))))))
write.csv(df_f_imp,"df_f_imp.csv", row.names = FALSE)
